Observed data

Observed log-chlorophyll at representative station in SF Bay Delta region.

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# flow data, left moving window average of 30 days
data(sf_fldat)
fl_dat <- sf_fldat %>% 
  rename(date = Date) %>% 
  filter(station %in% 'sac') %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )
  
# format the data to model
data(sf_dat)
sf_mod <- sf_dat %>% 
  filter(Site_Code %in% 'C3') %>% 
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    mo = month(date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'date') %>% 
  mutate( # all variables ln-transformed
    flo = log(qsm), 
    chl = log(chl), 
    tss = log(tss), 
    nh = log(nh)
    ) %>% 
  select(-q, -qsm, -station, -Latitude, -Longitude, -Location)

# plot, all
p <- ggplot(sf_mod, aes(x = date, y = chl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_mod, aes(x = yr, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_mod, aes(x = mo, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Adding nitrogen and turbidity covariates

# formula for best annual, seasonal, flow model
strt <- best2$formula %>% 
  as.character

smths <- c(
  "s(nh, bs = 'tp')",  
  "s(tss, bs = 'tp')",
  "te(nh, tss, bs = c('tp', 'tp'))"
)

# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
  
 frm <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ') %>% 
        paste('chl ~', strt[3], '+',  .) %>% 
        formula
    }) 
  
 frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sf_mod, 
    na.action = na.exclude
  )

})
names(mods3) <- paste0('mod', seq_along(mods3))

Summary stats of annual, seasonal, flow, nitrogen, turbidity models:

# best model with season, year, flow
best3 <- map(mods3, AIC) %>% 
  unlist %>% 
  which.min %>% 
  mods3[[.]] 

best <- list(best1 = best1, best2 = best2, best3 = best3)

# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
best1 s(dec_time) 7.332864 8.308760 9.2780106 0.0000000
best1 s(doy) 3.385611 8.000000 5.2844996 0.0000000
best1 te(dec_time,doy) 6.637691 15.000000 1.4177080 0.0003557
best2 s(dec_time) 6.587614 7.657166 8.8044126 0.0000000
best2 s(doy) 3.262079 8.000000 1.1046635 0.0010105
best2 te(flo,dec_time) 10.316629 20.000000 3.8902702 0.0000000
best2 te(dec_time,doy) 8.322306 15.000000 2.1431672 0.0000026
best2 te(dec_time,doy,flo) 8.912490 32.000000 0.9655715 0.0000223
best3 s(dec_time) 7.180711 8.155938 10.2448602 0.0000000
best3 s(doy) 3.350158 8.000000 3.0203304 0.0000000
best3 te(flo,dec_time) 5.415305 20.000000 1.3536281 0.0000000
best3 te(dec_time,doy) 1.868603 15.000000 0.1308509 0.0301381
best3 te(dec_time,doy,flo) 23.950789 60.000000 1.1852758 0.0000000
best3 s(nh) 1.000000 1.000000 1.2562915 0.2628919
best3 s(tss) 1.001143 1.002228 21.8136411 0.0000038
# summary stats of GAMs
map(best, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
best1 971.9230 0.4755770
best2 869.6463 0.5796506
best3 829.8335 0.5970295
# predictions
sf_res3 <- map(best, function(x){
  sf_mod %>% 
    mutate(
      pred = predict(x)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sf_res3, aes(x = date)) + 
  geom_point(data = sf_mod, aes(y = chl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)